DataFrames Julia Introduction Tutorial Solution#

This is one possible solution to the data frames exercise tutorial.

using DataFrames
using CSV
[ Info: Precompiling DataFrames [a93c6f00-e57d-5684-b7b6-d8193f3e46c0]
higgs_ml = CSV.read(joinpath("..", "assets", "atlas-higgs-challenge-2014-v2-reduced.csv"), DataFrame)
50000×35 DataFrame
49975 rows omitted
RowEventIdDER_mass_MMCDER_mass_transverse_met_lepDER_mass_visDER_pt_hDER_deltaeta_jet_jetDER_mass_jet_jetDER_prodeta_jet_jetDER_deltar_tau_lepDER_pt_totDER_sum_ptDER_pt_ratio_lep_tauDER_met_phi_centralityDER_lep_eta_centralityPRI_tau_ptPRI_tau_etaPRI_tau_phiPRI_lep_ptPRI_lep_etaPRI_lep_phiPRI_metPRI_met_phiPRI_met_sumetPRI_jet_numPRI_jet_leading_ptPRI_jet_leading_etaPRI_jet_leading_phiPRI_jet_subleading_ptPRI_jet_subleading_etaPRI_jet_subleading_phiPRI_jet_all_ptWeightLabelKaggleSetKaggleWeight
Int64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Int64Float64Float64Float64Float64Float64Float64Float64Float64String1String1Float64
1100000138.4751.65597.82727.980.91124.7112.6663.06441.928197.761.5821.3960.232.6381.0170.38151.6262.273-2.41416.824-0.277258.733267.4352.150.44446.0621.24-2.475113.4970.00081448st0.00265331
2100001160.93768.768103.23548.146-999.0-999.0-999.03.4732.078125.1570.8791.414-999.042.0142.039-3.01136.9180.5010.10344.704-1.916164.546146.2260.7251.158-999.0-999.0-999.046.2260.681042bt2.23358
3100002-999.0162.172125.95335.635-999.0-999.0-999.03.1489.336197.8143.7761.414-999.032.154-0.705-2.093121.409-0.9531.05254.283-2.186260.414144.2512.053-2.028-999.0-999.0-999.044.2510.715742bt2.34739
4100003143.90581.41780.9430.414-999.0-999.0-999.03.310.41475.9682.354-1.285-999.022.647-1.6550.0153.321-0.522-3.131.0820.0686.0620-999.0-999.0-999.0-999.0-999.0-999.0-0.01.66065bt5.44638
5100004175.86416.915134.80516.405-999.0-999.0-999.03.89116.40557.9831.056-1.385-999.028.209-2.197-2.23129.7740.7981.5692.723-0.87153.1310-999.0-999.0-999.0-999.0-999.0-999.00.01.90426bt6.24533
610000589.74413.5559.149116.3442.636284.584-0.541.36261.619278.8760.5880.4790.97553.6510.3711.32931.565-0.8841.85740.7352.237282.849390.547-2.412-0.65356.1650.2243.106193.660.0254338bt0.083414
7100006148.75428.862107.782106.130.733158.3590.1132.9412.545305.9673.3711.3930.79128.851.1132.40997.240.675-0.96638.421-1.443294.0742123.010.8641.4556.8670.131-2.767179.8770.00081448st0.00265331
8100007154.91610.41894.71429.169-999.0-999.0-999.02.8971.526138.1780.365-1.305-999.078.80.6541.54728.740.506-1.34722.275-1.761187.299130.638-0.715-1.724-999.0-999.0-999.030.6380.00572068st0.0186361
9100008105.59450.559100.9894.288-999.0-999.0-999.02.9044.28865.3330.675-1.366-999.039.0082.433-2.53226.3250.211.88437.7910.024129.8040-999.0-999.0-999.0-999.0-999.0-999.00.01.6148bt5.296
10100009128.05388.94169.272193.392-999.0-999.0-999.01.60928.859255.1230.5990.538-999.054.646-1.5330.41632.742-0.317-0.636132.6780.845294.7411167.735-2.767-2.514-999.0-999.0-999.0167.7350.000461025st0.00150187
11100010-999.086.2479.69227.201-999.0-999.0-999.02.33827.20181.7341.75-1.412-999.029.718-0.8662.87852.0160.126-1.28851.2760.688250.1780-999.0-999.0-999.0-999.0-999.0-999.00.00.701141bt2.2995
12100011114.74410.28675.71230.8162.563252.599-1.4012.88836.745239.8041.0611.3640.76935.976-0.669-0.34238.188-0.1652.50222.3852.148290.547376.773-0.790.30356.8761.773-2.079165.640.093659bt0.30717
13100012145.29764.234103.565106.999-999.0-999.0-999.02.18324.66192.2450.5760.689-999.062.89-0.766-1.63236.2370.722-0.03543.91-1.907232.362193.117-0.971.943-999.0-999.0-999.093.1170.51274bt1.68161
4998914998891.59470.57457.61534.452-999.0-999.0-999.02.8433.00989.1381.227-1.401-999.025.4431.4-1.22831.2191.9392.26445.691-0.149111.575132.476-1.0572.877-999.0-999.0-999.032.4760.617144bt2.02402
49990149989-999.068.95776.1912.304-999.0-999.0-999.02.322.30474.6111.072-1.414-999.036.01-1.651-0.45238.601-0.594-2.51740.1811.63396.3910-999.0-999.0-999.0-999.0-999.0-999.00.01.43486bt4.70584
49991149990118.3451.06351.062116.2771.31238.7664.3621.7852.564292.1172.0081.4140.023.0890.32.1246.3560.158-2.38471.4782.98338.4912166.372-2.844-0.14856.3-1.5342.692222.6720.00572068st0.0186361
49992149991131.12758.56685.136151.547-999.0-999.0-999.01.45511.819268.6110.9891.342-999.064.296-1.95-2.07863.568-2.053-0.62657.335-1.639355.6551140.7460.5931.712-999.0-999.0-999.0140.7460.000461282st0.0015027
49993149992-999.095.86490.31721.262-999.0-999.0-999.02.90721.26264.7172.167-1.096-999.020.4320.52.28244.285-1.5790.25151.998-2.795184.1460-999.0-999.0-999.0-999.0-999.0-999.00.00.832855bt2.73148
4999414999399.45281.1373.53970.79-999.0-999.0-999.02.14241.225159.9222.9911.194-999.024.1081.616-2.42472.1081.407-0.29234.003-2.212216.834163.7052.8842.632-999.0-999.0-999.063.7050.000461282st0.0015027
4999514999459.65367.64850.78324.749-999.0-999.0-999.01.62124.74972.5221.717-1.414-999.026.691-1.067-2.87245.83-0.939-1.25629.4111.087177.0770-999.0-999.0-999.0-999.0-999.0-999.00.01.41038bt4.62555
49996149995133.01329.19878.723110.5760.666281.596-0.0731.9656.406501.8540.4181.3980.073.1740.8191.3230.5810.952-0.63651.2070.12553.8573193.2970.14-1.79892.691-0.5261.653398.0990.0254338bt0.083414
49997149996-999.0125.73358.863174.7340.828171.433-0.1391.21634.733305.9292.2340.0070.06230.498-0.341.37568.1360.8691.245134.87-0.186409.8192149.11-0.234-2.23558.1840.5942.188207.2940.720062bt2.36156
49998149997128.49818.58869.90354.6013.932666.91-3.563.0252.339261.0350.844-1.3980.55938.094-0.936-3.10632.158-0.9480.15261.561-0.269348.6252122.3-1.4143.04368.4832.5180.046190.7830.000461282st0.0015027
49999149998151.11370.10693.9914.145-999.0-999.0-999.03.44.14579.2361.7111.405-999.029.2250.7460.52150.012.074-2.60924.5890.476116.5390-999.0-999.0-999.0-999.0-999.0-999.00.00.00572068st0.0186361
50000149999104.2118.26858.43880.275-999.0-999.0-999.02.13531.245106.0910.8511.368-999.030.6450.8592.11126.094-0.4840.45143.2011.002161.614149.353-2.641-2.038-999.0-999.0-999.049.3530.000461282st0.0015027
# To set missing values, these have to be allowed!
allowmissing!(higgs_ml)
50000×35 DataFrame
49975 rows omitted
RowEventIdDER_mass_MMCDER_mass_transverse_met_lepDER_mass_visDER_pt_hDER_deltaeta_jet_jetDER_mass_jet_jetDER_prodeta_jet_jetDER_deltar_tau_lepDER_pt_totDER_sum_ptDER_pt_ratio_lep_tauDER_met_phi_centralityDER_lep_eta_centralityPRI_tau_ptPRI_tau_etaPRI_tau_phiPRI_lep_ptPRI_lep_etaPRI_lep_phiPRI_metPRI_met_phiPRI_met_sumetPRI_jet_numPRI_jet_leading_ptPRI_jet_leading_etaPRI_jet_leading_phiPRI_jet_subleading_ptPRI_jet_subleading_etaPRI_jet_subleading_phiPRI_jet_all_ptWeightLabelKaggleSetKaggleWeight
Int64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Int64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?Float64?String1?String1?Float64?
1100000138.4751.65597.82727.980.91124.7112.6663.06441.928197.761.5821.3960.232.6381.0170.38151.6262.273-2.41416.824-0.277258.733267.4352.150.44446.0621.24-2.475113.4970.00081448st0.00265331
2100001160.93768.768103.23548.146-999.0-999.0-999.03.4732.078125.1570.8791.414-999.042.0142.039-3.01136.9180.5010.10344.704-1.916164.546146.2260.7251.158-999.0-999.0-999.046.2260.681042bt2.23358
3100002-999.0162.172125.95335.635-999.0-999.0-999.03.1489.336197.8143.7761.414-999.032.154-0.705-2.093121.409-0.9531.05254.283-2.186260.414144.2512.053-2.028-999.0-999.0-999.044.2510.715742bt2.34739
4100003143.90581.41780.9430.414-999.0-999.0-999.03.310.41475.9682.354-1.285-999.022.647-1.6550.0153.321-0.522-3.131.0820.0686.0620-999.0-999.0-999.0-999.0-999.0-999.0-0.01.66065bt5.44638
5100004175.86416.915134.80516.405-999.0-999.0-999.03.89116.40557.9831.056-1.385-999.028.209-2.197-2.23129.7740.7981.5692.723-0.87153.1310-999.0-999.0-999.0-999.0-999.0-999.00.01.90426bt6.24533
610000589.74413.5559.149116.3442.636284.584-0.541.36261.619278.8760.5880.4790.97553.6510.3711.32931.565-0.8841.85740.7352.237282.849390.547-2.412-0.65356.1650.2243.106193.660.0254338bt0.083414
7100006148.75428.862107.782106.130.733158.3590.1132.9412.545305.9673.3711.3930.79128.851.1132.40997.240.675-0.96638.421-1.443294.0742123.010.8641.4556.8670.131-2.767179.8770.00081448st0.00265331
8100007154.91610.41894.71429.169-999.0-999.0-999.02.8971.526138.1780.365-1.305-999.078.80.6541.54728.740.506-1.34722.275-1.761187.299130.638-0.715-1.724-999.0-999.0-999.030.6380.00572068st0.0186361
9100008105.59450.559100.9894.288-999.0-999.0-999.02.9044.28865.3330.675-1.366-999.039.0082.433-2.53226.3250.211.88437.7910.024129.8040-999.0-999.0-999.0-999.0-999.0-999.00.01.6148bt5.296
10100009128.05388.94169.272193.392-999.0-999.0-999.01.60928.859255.1230.5990.538-999.054.646-1.5330.41632.742-0.317-0.636132.6780.845294.7411167.735-2.767-2.514-999.0-999.0-999.0167.7350.000461025st0.00150187
11100010-999.086.2479.69227.201-999.0-999.0-999.02.33827.20181.7341.75-1.412-999.029.718-0.8662.87852.0160.126-1.28851.2760.688250.1780-999.0-999.0-999.0-999.0-999.0-999.00.00.701141bt2.2995
12100011114.74410.28675.71230.8162.563252.599-1.4012.88836.745239.8041.0611.3640.76935.976-0.669-0.34238.188-0.1652.50222.3852.148290.547376.773-0.790.30356.8761.773-2.079165.640.093659bt0.30717
13100012145.29764.234103.565106.999-999.0-999.0-999.02.18324.66192.2450.5760.689-999.062.89-0.766-1.63236.2370.722-0.03543.91-1.907232.362193.117-0.971.943-999.0-999.0-999.093.1170.51274bt1.68161
4998914998891.59470.57457.61534.452-999.0-999.0-999.02.8433.00989.1381.227-1.401-999.025.4431.4-1.22831.2191.9392.26445.691-0.149111.575132.476-1.0572.877-999.0-999.0-999.032.4760.617144bt2.02402
49990149989-999.068.95776.1912.304-999.0-999.0-999.02.322.30474.6111.072-1.414-999.036.01-1.651-0.45238.601-0.594-2.51740.1811.63396.3910-999.0-999.0-999.0-999.0-999.0-999.00.01.43486bt4.70584
49991149990118.3451.06351.062116.2771.31238.7664.3621.7852.564292.1172.0081.4140.023.0890.32.1246.3560.158-2.38471.4782.98338.4912166.372-2.844-0.14856.3-1.5342.692222.6720.00572068st0.0186361
49992149991131.12758.56685.136151.547-999.0-999.0-999.01.45511.819268.6110.9891.342-999.064.296-1.95-2.07863.568-2.053-0.62657.335-1.639355.6551140.7460.5931.712-999.0-999.0-999.0140.7460.000461282st0.0015027
49993149992-999.095.86490.31721.262-999.0-999.0-999.02.90721.26264.7172.167-1.096-999.020.4320.52.28244.285-1.5790.25151.998-2.795184.1460-999.0-999.0-999.0-999.0-999.0-999.00.00.832855bt2.73148
4999414999399.45281.1373.53970.79-999.0-999.0-999.02.14241.225159.9222.9911.194-999.024.1081.616-2.42472.1081.407-0.29234.003-2.212216.834163.7052.8842.632-999.0-999.0-999.063.7050.000461282st0.0015027
4999514999459.65367.64850.78324.749-999.0-999.0-999.01.62124.74972.5221.717-1.414-999.026.691-1.067-2.87245.83-0.939-1.25629.4111.087177.0770-999.0-999.0-999.0-999.0-999.0-999.00.01.41038bt4.62555
49996149995133.01329.19878.723110.5760.666281.596-0.0731.9656.406501.8540.4181.3980.073.1740.8191.3230.5810.952-0.63651.2070.12553.8573193.2970.14-1.79892.691-0.5261.653398.0990.0254338bt0.083414
49997149996-999.0125.73358.863174.7340.828171.433-0.1391.21634.733305.9292.2340.0070.06230.498-0.341.37568.1360.8691.245134.87-0.186409.8192149.11-0.234-2.23558.1840.5942.188207.2940.720062bt2.36156
49998149997128.49818.58869.90354.6013.932666.91-3.563.0252.339261.0350.844-1.3980.55938.094-0.936-3.10632.158-0.9480.15261.561-0.269348.6252122.3-1.4143.04368.4832.5180.046190.7830.000461282st0.0015027
49999149998151.11370.10693.9914.145-999.0-999.0-999.03.44.14579.2361.7111.405-999.029.2250.7460.52150.012.074-2.60924.5890.476116.5390-999.0-999.0-999.0-999.0-999.0-999.00.00.00572068st0.0186361
50000149999104.2118.26858.43880.275-999.0-999.0-999.02.13531.245106.0910.8511.368-999.030.6450.8592.11126.094-0.4840.45143.2011.002161.614149.353-2.641-2.038-999.0-999.0-999.049.3530.000461282st0.0015027
# We define two versions of the function, one for numbers (where '<' is meaningful)
# and a fallback for other column types, that never changes the values
missing_value(v::Number) = if (v===missing || v==-999.0) missing else v end
missing_value(v) = v
missing_value (generic function with 2 methods)
# Use 'reverse' to process the columns right to left and then ultimately preserve the column order
for column in reverse!(names(higgs_ml))
    select!(higgs_ml, column => ByRow(missing_value) => column, :)
end
higgs_ml
50000×35 DataFrame
49975 rows omitted
RowEventIdDER_mass_MMCDER_mass_transverse_met_lepDER_mass_visDER_pt_hDER_deltaeta_jet_jetDER_mass_jet_jetDER_prodeta_jet_jetDER_deltar_tau_lepDER_pt_totDER_sum_ptDER_pt_ratio_lep_tauDER_met_phi_centralityDER_lep_eta_centralityPRI_tau_ptPRI_tau_etaPRI_tau_phiPRI_lep_ptPRI_lep_etaPRI_lep_phiPRI_metPRI_met_phiPRI_met_sumetPRI_jet_numPRI_jet_leading_ptPRI_jet_leading_etaPRI_jet_leading_phiPRI_jet_subleading_ptPRI_jet_subleading_etaPRI_jet_subleading_phiPRI_jet_all_ptWeightLabelKaggleSetKaggleWeight
Int64Float64?Float64Float64Float64Float64?Float64?Float64?Float64Float64Float64Float64Float64Float64?Float64Float64Float64Float64Float64Float64Float64Float64Float64Int64Float64?Float64?Float64?Float64?Float64?Float64?Float64Float64String1String1Float64
1100000138.4751.65597.82727.980.91124.7112.6663.06441.928197.761.5821.3960.232.6381.0170.38151.6262.273-2.41416.824-0.277258.733267.4352.150.44446.0621.24-2.475113.4970.00081448st0.00265331
2100001160.93768.768103.23548.146missingmissingmissing3.4732.078125.1570.8791.414missing42.0142.039-3.01136.9180.5010.10344.704-1.916164.546146.2260.7251.158missingmissingmissing46.2260.681042bt2.23358
3100002missing162.172125.95335.635missingmissingmissing3.1489.336197.8143.7761.414missing32.154-0.705-2.093121.409-0.9531.05254.283-2.186260.414144.2512.053-2.028missingmissingmissing44.2510.715742bt2.34739
4100003143.90581.41780.9430.414missingmissingmissing3.310.41475.9682.354-1.285missing22.647-1.6550.0153.321-0.522-3.131.0820.0686.0620missingmissingmissingmissingmissingmissing-0.01.66065bt5.44638
5100004175.86416.915134.80516.405missingmissingmissing3.89116.40557.9831.056-1.385missing28.209-2.197-2.23129.7740.7981.5692.723-0.87153.1310missingmissingmissingmissingmissingmissing0.01.90426bt6.24533
610000589.74413.5559.149116.3442.636284.584-0.541.36261.619278.8760.5880.4790.97553.6510.3711.32931.565-0.8841.85740.7352.237282.849390.547-2.412-0.65356.1650.2243.106193.660.0254338bt0.083414
7100006148.75428.862107.782106.130.733158.3590.1132.9412.545305.9673.3711.3930.79128.851.1132.40997.240.675-0.96638.421-1.443294.0742123.010.8641.4556.8670.131-2.767179.8770.00081448st0.00265331
8100007154.91610.41894.71429.169missingmissingmissing2.8971.526138.1780.365-1.305missing78.80.6541.54728.740.506-1.34722.275-1.761187.299130.638-0.715-1.724missingmissingmissing30.6380.00572068st0.0186361
9100008105.59450.559100.9894.288missingmissingmissing2.9044.28865.3330.675-1.366missing39.0082.433-2.53226.3250.211.88437.7910.024129.8040missingmissingmissingmissingmissingmissing0.01.6148bt5.296
10100009128.05388.94169.272193.392missingmissingmissing1.60928.859255.1230.5990.538missing54.646-1.5330.41632.742-0.317-0.636132.6780.845294.7411167.735-2.767-2.514missingmissingmissing167.7350.000461025st0.00150187
11100010missing86.2479.69227.201missingmissingmissing2.33827.20181.7341.75-1.412missing29.718-0.8662.87852.0160.126-1.28851.2760.688250.1780missingmissingmissingmissingmissingmissing0.00.701141bt2.2995
12100011114.74410.28675.71230.8162.563252.599-1.4012.88836.745239.8041.0611.3640.76935.976-0.669-0.34238.188-0.1652.50222.3852.148290.547376.773-0.790.30356.8761.773-2.079165.640.093659bt0.30717
13100012145.29764.234103.565106.999missingmissingmissing2.18324.66192.2450.5760.689missing62.89-0.766-1.63236.2370.722-0.03543.91-1.907232.362193.117-0.971.943missingmissingmissing93.1170.51274bt1.68161
4998914998891.59470.57457.61534.452missingmissingmissing2.8433.00989.1381.227-1.401missing25.4431.4-1.22831.2191.9392.26445.691-0.149111.575132.476-1.0572.877missingmissingmissing32.4760.617144bt2.02402
49990149989missing68.95776.1912.304missingmissingmissing2.322.30474.6111.072-1.414missing36.01-1.651-0.45238.601-0.594-2.51740.1811.63396.3910missingmissingmissingmissingmissingmissing0.01.43486bt4.70584
49991149990118.3451.06351.062116.2771.31238.7664.3621.7852.564292.1172.0081.4140.023.0890.32.1246.3560.158-2.38471.4782.98338.4912166.372-2.844-0.14856.3-1.5342.692222.6720.00572068st0.0186361
49992149991131.12758.56685.136151.547missingmissingmissing1.45511.819268.6110.9891.342missing64.296-1.95-2.07863.568-2.053-0.62657.335-1.639355.6551140.7460.5931.712missingmissingmissing140.7460.000461282st0.0015027
49993149992missing95.86490.31721.262missingmissingmissing2.90721.26264.7172.167-1.096missing20.4320.52.28244.285-1.5790.25151.998-2.795184.1460missingmissingmissingmissingmissingmissing0.00.832855bt2.73148
4999414999399.45281.1373.53970.79missingmissingmissing2.14241.225159.9222.9911.194missing24.1081.616-2.42472.1081.407-0.29234.003-2.212216.834163.7052.8842.632missingmissingmissing63.7050.000461282st0.0015027
4999514999459.65367.64850.78324.749missingmissingmissing1.62124.74972.5221.717-1.414missing26.691-1.067-2.87245.83-0.939-1.25629.4111.087177.0770missingmissingmissingmissingmissingmissing0.01.41038bt4.62555
49996149995133.01329.19878.723110.5760.666281.596-0.0731.9656.406501.8540.4181.3980.073.1740.8191.3230.5810.952-0.63651.2070.12553.8573193.2970.14-1.79892.691-0.5261.653398.0990.0254338bt0.083414
49997149996missing125.73358.863174.7340.828171.433-0.1391.21634.733305.9292.2340.0070.06230.498-0.341.37568.1360.8691.245134.87-0.186409.8192149.11-0.234-2.23558.1840.5942.188207.2940.720062bt2.36156
49998149997128.49818.58869.90354.6013.932666.91-3.563.0252.339261.0350.844-1.3980.55938.094-0.936-3.10632.158-0.9480.15261.561-0.269348.6252122.3-1.4143.04368.4832.5180.046190.7830.000461282st0.0015027
49999149998151.11370.10693.9914.145missingmissingmissing3.44.14579.2361.7111.405missing29.2250.7460.52150.012.074-2.60924.5890.476116.5390missingmissingmissingmissingmissingmissing0.00.00572068st0.0186361
50000149999104.2118.26858.43880.275missingmissingmissing2.13531.245106.0910.8511.368missing30.6450.8592.11126.094-0.4840.45143.2011.002161.614149.353-2.641-2.038missingmissingmissing49.3530.000461282st0.0015027
using Plots
using StatsPlots
signal = filter(:Label => l -> l == "s", higgs_ml)
background = filter(:Label => l -> l == "b", higgs_ml)
32935×35 DataFrame
32910 rows omitted
RowEventIdDER_mass_MMCDER_mass_transverse_met_lepDER_mass_visDER_pt_hDER_deltaeta_jet_jetDER_mass_jet_jetDER_prodeta_jet_jetDER_deltar_tau_lepDER_pt_totDER_sum_ptDER_pt_ratio_lep_tauDER_met_phi_centralityDER_lep_eta_centralityPRI_tau_ptPRI_tau_etaPRI_tau_phiPRI_lep_ptPRI_lep_etaPRI_lep_phiPRI_metPRI_met_phiPRI_met_sumetPRI_jet_numPRI_jet_leading_ptPRI_jet_leading_etaPRI_jet_leading_phiPRI_jet_subleading_ptPRI_jet_subleading_etaPRI_jet_subleading_phiPRI_jet_all_ptWeightLabelKaggleSetKaggleWeight
Int64Float64?Float64Float64Float64Float64?Float64?Float64?Float64Float64Float64Float64Float64Float64?Float64Float64Float64Float64Float64Float64Float64Float64Float64Int64Float64?Float64?Float64?Float64?Float64?Float64?Float64Float64String1String1Float64
1100001160.93768.768103.23548.146missingmissingmissing3.4732.078125.1570.8791.414missing42.0142.039-3.01136.9180.5010.10344.704-1.916164.546146.2260.7251.158missingmissingmissing46.2260.681042bt2.23358
2100002missing162.172125.95335.635missingmissingmissing3.1489.336197.8143.7761.414missing32.154-0.705-2.093121.409-0.9531.05254.283-2.186260.414144.2512.053-2.028missingmissingmissing44.2510.715742bt2.34739
3100003143.90581.41780.9430.414missingmissingmissing3.310.41475.9682.354-1.285missing22.647-1.6550.0153.321-0.522-3.131.0820.0686.0620missingmissingmissingmissingmissingmissing-0.01.66065bt5.44638
4100004175.86416.915134.80516.405missingmissingmissing3.89116.40557.9831.056-1.385missing28.209-2.197-2.23129.7740.7981.5692.723-0.87153.1310missingmissingmissingmissingmissingmissing0.01.90426bt6.24533
510000589.74413.5559.149116.3442.636284.584-0.541.36261.619278.8760.5880.4790.97553.6510.3711.32931.565-0.8841.85740.7352.237282.849390.547-2.412-0.65356.1650.2243.106193.660.0254338bt0.083414
6100008105.59450.559100.9894.288missingmissingmissing2.9044.28865.3330.675-1.366missing39.0082.433-2.53226.3250.211.88437.7910.024129.8040missingmissingmissingmissingmissingmissing0.01.6148bt5.296
7100010missing86.2479.69227.201missingmissingmissing2.33827.20181.7341.75-1.412missing29.718-0.8662.87852.0160.126-1.28851.2760.688250.1780missingmissingmissingmissingmissingmissing0.00.701141bt2.2995
8100011114.74410.28675.71230.8162.563252.599-1.4012.88836.745239.8041.0611.3640.76935.976-0.669-0.34238.188-0.1652.50222.3852.148290.547376.773-0.790.30356.8761.773-2.079165.640.093659bt0.30717
9100012145.29764.234103.565106.999missingmissingmissing2.18324.66192.2450.5760.689missing62.89-0.766-1.63236.2370.722-0.03543.91-1.907232.362193.117-0.971.943missingmissingmissing93.1170.51274bt1.68161
1010001382.48831.66364.1288.232missingmissingmissing2.8238.23258.6491.303-1.414missing25.47-0.654-2.9933.179-1.665-0.35412.4391.433163.420missingmissingmissingmissingmissingmissing0.00.66589bt2.18389
11100014missing109.41214.39817.323missingmissingmissing0.47217.32362.5651.774-0.272missing22.5521.3891.3440.0131.8561.41275.197-1.583198.6160missingmissingmissingmissingmissingmissing0.00.655922bt2.1512
12100016114.2564.35167.96347.221missingmissingmissing2.95426.243100.931.1450.218missing30.1450.484-0.92934.522-0.2151.94141.8992.055191.568136.263-0.766-0.686missingmissingmissing36.2630.443598bt1.45485
13100018missing85.18668.8275.042missingmissingmissing2.1165.04271.4431.558-1.351missing27.9311.1752.35643.5122.3320.58444.698-2.033151.8160missingmissingmissingmissingmissingmissing0.01.56163bt5.12162
32924149980missing87.30489.60414.486missingmissingmissing2.25214.48670.9130.597-1.411missing44.390.683-0.50726.523-1.3940.36474.8333.103149.5710missingmissingmissingmissingmissingmissing0.01.55172bt5.08911
32925149982missing93.3235.5189.338missingmissingmissing1.1259.33866.7541.829-1.341missing23.5991.7090.90143.1552.3920.00751.511-2.847144.5580missingmissingmissingmissingmissingmissing-0.00.643797bt2.11143
32926149983225.39783.38134.6870.3870.11462.4853.5353.40638.621237.1890.648-0.3660.053.6940.209-1.2734.799-1.8391.45150.239-1.537253.344364.0151.9381.02947.6731.8242.178148.6960.334829bt1.09813
3292714998493.89918.14166.7139.914missingmissingmissing2.9321.387109.3691.912-1.388missing24.2062.31.18846.272.179-2.16620.348-1.56584.865138.8931.2321.161missingmissingmissing38.8930.51274bt1.68161
3292814998594.09569.65463.508374.9190.649292.1521.0670.49875.552768.4081.560.9460.0102.883-0.4810.744160.477-0.3341.22122.7710.718799.3383320.4520.758-2.373143.8981.407-1.119505.0490.0254338bt0.083414
3292914998768.06828.45439.672209.3231.114165.8930.1420.8391.906353.4113.5941.4040.05325.186-2.1050.39590.511-1.628-0.29698.8760.006327.0342190.435-1.229-3.00347.279-0.1151.974237.7140.443598bt1.45485
3293014998891.59470.57457.61534.452missingmissingmissing2.8433.00989.1381.227-1.401missing25.4431.4-1.22831.2191.9392.26445.691-0.149111.575132.476-1.0572.877missingmissingmissing32.4760.617144bt2.02402
32931149989missing68.95776.1912.304missingmissingmissing2.322.30474.6111.072-1.414missing36.01-1.651-0.45238.601-0.594-2.51740.1811.63396.3910missingmissingmissingmissingmissingmissing0.01.43486bt4.70584
32932149992missing95.86490.31721.262missingmissingmissing2.90721.26264.7172.167-1.096missing20.4320.52.28244.285-1.5790.25151.998-2.795184.1460missingmissingmissingmissingmissingmissing0.00.832855bt2.73148
3293314999459.65367.64850.78324.749missingmissingmissing1.62124.74972.5221.717-1.414missing26.691-1.067-2.87245.83-0.939-1.25629.4111.087177.0770missingmissingmissingmissingmissingmissing0.01.41038bt4.62555
32934149995133.01329.19878.723110.5760.666281.596-0.0731.9656.406501.8540.4181.3980.073.1740.8191.3230.5810.952-0.63651.2070.12553.8573193.2970.14-1.79892.691-0.5261.653398.0990.0254338bt0.083414
32935149996missing125.73358.863174.7340.828171.433-0.1391.21634.733305.9292.2340.0070.06230.498-0.341.37568.1360.8691.245134.87-0.186409.8192149.11-0.234-2.23558.1840.5942.188207.2940.720062bt2.36156
@df signal histogram(:PRI_tau_pt, alpha=0.4, label="Signal - Tau pT")
@df background histogram!(:PRI_tau_pt, alpha=0.4, label="Background - Tau pT")
@df signal histogram(:PRI_lep_pt, alpha=0.4, label="Signal - Lepton pT")
@df background histogram!(:PRI_lep_pt, alpha=0.4, label="Background - Lepton pT")
@df signal scatter(:PRI_tau_eta, :PRI_tau_phi, alpha=0.4, label="Signal")
@df background scatter!(:PRI_tau_eta, :PRI_tau_phi, alpha=0.4, label="Background")
n_signal = size(signal)[1]
@df signal scatter(:PRI_tau_eta[1:50:n_signal], :PRI_tau_phi[1:10:n_signal], alpha=0.4, label="Signal", title="(η, ϕ)")
n_background = size(background)[1]
@df background scatter!(:PRI_tau_eta[1:50:n_background], :PRI_tau_phi[1:10:n_background], alpha=0.4, label="Background")
using Statistics
combine(groupby(higgs_ml, :Label), :PRI_tau_pt => mean, :PRI_tau_pt => std, nrow)
2×4 DataFrame
RowLabelPRI_tau_pt_meanPRI_tau_pt_stdnrow
String1Float64Float64Int64
1s46.254325.234417065
2b34.874419.427532935
combine(groupby(higgs_ml, :Label), :PRI_lep_pt => mean, :PRI_lep_pt => std, nrow)
2×4 DataFrame
RowLabelPRI_lep_pt_meanPRI_lep_pt_stdnrow
String1Float64Float64Int64
1s45.613521.532417065
2b47.146222.165132935
# Distance measure between pairs of (η, ϕ) vectors
δϕ(ϕ1, ϕ2) = begin
    δ = ϕ1 - ϕ2
    while δ > pi
        δ -= 2π
    end
    while δ < -pi
        δ += 2π
    end
    δ
end
dist(η1::Number, ϕ1::Number, η2::Number, ϕ2::Number) = sqrt((η1-η2)^2 + δϕ(ϕ1, ϕ2)^2)
dist(η1, ϕ1, η2, ϕ2) = missing
dist (generic function with 2 methods)
select!(signal, :EventId,
    [:PRI_tau_eta, :PRI_tau_phi, :PRI_lep_eta, :PRI_lep_phi] => ByRow(dist) => :Tau_lep_distance, r"PRI.*", :)
select!(background, :EventId,
    [:PRI_tau_eta, :PRI_tau_phi, :PRI_lep_eta, :PRI_lep_phi] => ByRow(dist) => :Tau_lep_distance, r"PRI.*", :)
32935×36 DataFrame
32910 rows omitted
RowEventIdTau_lep_distancePRI_tau_ptPRI_tau_etaPRI_tau_phiPRI_lep_ptPRI_lep_etaPRI_lep_phiPRI_metPRI_met_phiPRI_met_sumetPRI_jet_numPRI_jet_leading_ptPRI_jet_leading_etaPRI_jet_leading_phiPRI_jet_subleading_ptPRI_jet_subleading_etaPRI_jet_subleading_phiPRI_jet_all_ptDER_mass_MMCDER_mass_transverse_met_lepDER_mass_visDER_pt_hDER_deltaeta_jet_jetDER_mass_jet_jetDER_prodeta_jet_jetDER_deltar_tau_lepDER_pt_totDER_sum_ptDER_pt_ratio_lep_tauDER_met_phi_centralityDER_lep_eta_centralityWeightLabelKaggleSetKaggleWeight
Int64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Int64Float64?Float64?Float64?Float64?Float64?Float64?Float64Float64?Float64Float64Float64Float64?Float64?Float64?Float64Float64Float64Float64Float64Float64?Float64String1String1Float64
11000013.473142.0142.039-3.01136.9180.5010.10344.704-1.916164.546146.2260.7251.158missingmissingmissing46.226160.93768.768103.23548.146missingmissingmissing3.4732.078125.1570.8791.414missing0.681042bt2.23358
21000023.1479732.154-0.705-2.093121.409-0.9531.05254.283-2.186260.414144.2512.053-2.028missingmissingmissing44.251missing162.172125.95335.635missingmissingmissing3.1489.336197.8143.7761.414missing0.715742bt2.34739
31000033.3099522.647-1.6550.0153.321-0.522-3.131.0820.0686.0620missingmissingmissingmissingmissingmissing-0.0143.90581.41780.9430.414missingmissingmissing3.310.41475.9682.354-1.285missing1.66065bt5.44638
41000043.8905328.209-2.197-2.23129.7740.7981.5692.723-0.87153.1310missingmissingmissingmissingmissingmissing0.0175.86416.915134.80516.405missingmissingmissing3.89116.40557.9831.056-1.385missing1.90426bt6.24533
51000051.3615553.6510.3711.32931.565-0.8841.85740.7352.237282.849390.547-2.412-0.65356.1650.2243.106193.6689.74413.5559.149116.3442.636284.584-0.541.36261.619278.8760.5880.4790.9750.0254338bt0.083414
61000082.9031239.0082.433-2.53226.3250.211.88437.7910.024129.8040missingmissingmissingmissingmissingmissing0.0105.59450.559100.9894.288missingmissingmissing2.9044.28865.3330.675-1.366missing1.6148bt5.296
71000102.3380629.718-0.8662.87852.0160.126-1.28851.2760.688250.1780missingmissingmissingmissingmissingmissing0.0missing86.2479.69227.201missingmissingmissing2.33827.20181.7341.75-1.412missing0.701141bt2.2995
81000112.8883135.976-0.669-0.34238.188-0.1652.50222.3852.148290.547376.773-0.790.30356.8761.773-2.079165.64114.74410.28675.71230.8162.563252.599-1.4012.88836.745239.8041.0611.3640.7690.093659bt0.30717
91000122.1827962.89-0.766-1.63236.2370.722-0.03543.91-1.907232.362193.117-0.971.943missingmissingmissing93.117145.29764.234103.565106.999missingmissingmissing2.18324.66192.2450.5760.689missing0.51274bt1.68161
101000132.8232325.47-0.654-2.9933.179-1.665-0.35412.4391.433163.420missingmissingmissingmissingmissingmissing0.082.48831.66364.1288.232missingmissingmissing2.8238.23258.6491.303-1.414missing0.66589bt2.18389
111000140.47251822.5521.3891.3440.0131.8561.41275.197-1.583198.6160missingmissingmissingmissingmissingmissing0.0missing109.41214.39817.323missingmissingmissing0.47217.32362.5651.774-0.272missing0.655922bt2.1512
121000162.953930.1450.484-0.92934.522-0.2151.94141.8992.055191.568136.263-0.766-0.686missingmissingmissing36.263114.2564.35167.96347.221missingmissingmissing2.95426.243100.931.1450.218missing0.443598bt1.45485
131000182.1162827.9311.1752.35643.5122.3320.58444.698-2.033151.8160missingmissingmissingmissingmissingmissing0.0missing85.18668.8275.042missingmissingmissing2.1165.04271.4431.558-1.351missing1.56163bt5.12162
329241499802.2522444.390.683-0.50726.523-1.3940.36474.8333.103149.5710missingmissingmissingmissingmissingmissing0.0missing87.30489.60414.486missingmissingmissing2.25214.48670.9130.597-1.411missing1.55172bt5.08911
329251499821.1250423.5991.7090.90143.1552.3920.00751.511-2.847144.5580missingmissingmissingmissingmissingmissing-0.0missing93.3235.5189.338missingmissingmissing1.1259.33866.7541.829-1.341missing0.643797bt2.11143
329261499833.405653.6940.209-1.2734.799-1.8391.45150.239-1.537253.344364.0151.9381.02947.6731.8242.178148.696225.39783.38134.6870.3870.11462.4853.5353.40638.621237.1890.648-0.3660.00.334829bt1.09813
329271499842.9316824.2062.31.18846.272.179-2.16620.348-1.56584.865138.8931.2321.161missingmissingmissing38.89393.89918.14166.7139.914missingmissingmissing2.9321.387109.3691.912-1.388missing0.51274bt1.68161
329281499850.498182102.883-0.4810.744160.477-0.3341.22122.7710.718799.3383320.4520.758-2.373143.8981.407-1.119505.04994.09569.65463.508374.9190.649292.1521.0670.49875.552768.4081.560.9460.00.0254338bt0.083414
329291499870.83964925.186-2.1050.39590.511-1.628-0.29698.8760.006327.0342190.435-1.229-3.00347.279-0.1151.974237.71468.06828.45439.672209.3231.114165.8930.1420.8391.906353.4113.5941.4040.0530.443598bt1.45485
329301499882.8427525.4431.4-1.22831.2191.9392.26445.691-0.149111.575132.476-1.0572.877missingmissingmissing32.47691.59470.57457.61534.452missingmissingmissing2.8433.00989.1381.227-1.401missing0.617144bt2.02402
329311499892.319836.01-1.651-0.45238.601-0.594-2.51740.1811.63396.3910missingmissingmissingmissingmissingmissing0.0missing68.95776.1912.304missingmissingmissing2.322.30474.6111.072-1.414missing1.43486bt4.70584
329321499922.9064120.4320.52.28244.285-1.5790.25151.998-2.795184.1460missingmissingmissingmissingmissingmissing0.0missing95.86490.31721.262missingmissingmissing2.90721.26264.7172.167-1.096missing0.832855bt2.73148
329331499941.6210626.691-1.067-2.87245.83-0.939-1.25629.4111.087177.0770missingmissingmissingmissingmissingmissing0.059.65367.64850.78324.749missingmissingmissing1.62124.74972.5221.717-1.414missing1.41038bt4.62555
329341499951.9605273.1740.8191.3230.5810.952-0.63651.2070.12553.8573193.2970.14-1.79892.691-0.5261.653398.099133.01329.19878.723110.5760.666281.596-0.0731.9656.406501.8540.4181.3980.00.0254338bt0.083414
329351499961.2159730.498-0.341.37568.1360.8691.245134.87-0.186409.8192149.11-0.234-2.23558.1840.5942.188207.294missing125.73358.863174.7340.828171.433-0.1391.21634.733305.9292.2340.0070.0620.720062bt2.36156
@df signal histogram(:Tau_lep_distance, alpha=0.4, label="Signal", title="τ - lepton distance")
@df background histogram!(:Tau_lep_distance, alpha=0.4, label="Background")